Code
import eia_api as api
import eia_data
import pandas as pd
import numpy as np
import requests
import json
import os
import datetime
import plotly.express as px
from ydata_profiling import ProfileReportThe goal of the backfill process is to pull the historical data for the required series using the settings.json file. This includes the following steps:
import eia_api as api
import eia_data
import pandas as pd
import numpy as np
import requests
import json
import os
import datetime
import plotly.express as px
from ydata_profiling import ProfileReportraw_json = open("../settings/series.json")
meta_json = json.load(raw_json)
series = pd.DataFrame(meta_json["series"])
api_path = meta_json["api_path"]facets_template = {
"parent" : None,
"subba" : None
}
start = datetime.datetime(meta_json["start"]["year"],
meta_json["start"]["month"],
meta_json["start"]["day"],
meta_json["start"]["hour"])
end = datetime.datetime(meta_json["end"]["year"],
meta_json["end"]["month"],
meta_json["end"]["day"],
meta_json["end"]["hour"])
offset = 2250
eia_api_key = os.getenv('EIA_API_KEY')
meta_path = meta_json["meta_path"]
data_path = meta_json["data_path"]metadata = api.eia_metadata(api_key = eia_api_key, api_path = api_path)
print(metadata.meta.keys())
print(metadata.meta["startPeriod"])
print(metadata.meta["endPeriod"])dict_keys(['id', 'name', 'description', 'frequency', 'facets', 'data', 'startPeriod', 'endPeriod', 'defaultDateFormat', 'defaultFrequency'])
2018-06-19T05
2024-06-22T07
for i in series.index:
facets = facets_template
facets["parent"] = series.at[i, "parent_id"]
facets["subba"] = series.at[i, "subba_id"]
print(facets)
temp = api.eia_backfill(api_key = eia_api_key,
api_path = api_path+ "data",
facets = facets,
start = start,
end = end,
offset = offset)
ts_obj = pd.DataFrame(np.arange(start = start, stop = end + datetime.timedelta(hours = 1), step = datetime.timedelta(hours = 1)).astype(datetime.datetime), columns=["index"])
ts_obj = ts_obj.merge(temp.data, left_on = "index", right_on = "period", how="left")
ts_obj.drop("period", axis = 1, inplace= True)
ts_obj = ts_obj.rename(columns= {"index": "period"})
meta_temp = eia_data.create_metadata(data = ts_obj, start = start, end = end, type = "backfill")
meta_temp["index"] = 1
meta_df = pd.DataFrame([meta_temp])
if i == series.index.start:
data = ts_obj
meta = meta_df
else:
data = data._append(ts_obj)
meta = meta._append(meta_df){'parent': 'CISO', 'subba': 'PGAE'}
{'parent': 'CISO', 'subba': 'SCE'}
{'parent': 'CISO', 'subba': 'SDGE'}
{'parent': 'CISO', 'subba': 'VEA'}
print(meta)
# The initial pull has some missing values
data.head() index parent subba time start \
0 1 CISO PGAE 2024-06-22 19:17:11.504732+00:00 2018-07-01 08:00:00
0 1 CISO SCE 2024-06-22 19:17:30.010912+00:00 2018-07-01 08:00:00
0 1 CISO SDGE 2024-06-22 19:17:46.075978+00:00 2018-07-01 08:00:00
0 1 CISO VEA 2024-06-22 19:18:03.131569+00:00 2018-07-01 08:00:00
end start_act end_act start_match \
0 2024-06-15 01:00:00 2018-07-01 08:00:00 2024-06-15 01:00:00 True
0 2024-06-15 01:00:00 2018-07-01 08:00:00 2024-06-15 01:00:00 True
0 2024-06-15 01:00:00 2018-07-01 08:00:00 2024-06-15 01:00:00 True
0 2024-06-15 01:00:00 2018-07-01 08:00:00 2024-06-15 01:00:00 True
end_match n_obs na type update success \
0 True 52218 98 backfill False False
0 True 52218 98 backfill False False
0 True 52218 98 backfill False False
0 True 52218 98 backfill False False
comments
0 Missing values were found;
0 Missing values were found;
0 Missing values were found;
0 Missing values were found;
| period | subba | subba-name | parent | parent-name | value | value-units | |
|---|---|---|---|---|---|---|---|
| 0 | 2018-07-01 08:00:00 | PGAE | Pacific Gas and Electric | CISO | California Independent System Operator | 12522.0 | megawatthours |
| 1 | 2018-07-01 09:00:00 | PGAE | Pacific Gas and Electric | CISO | California Independent System Operator | 11745.0 | megawatthours |
| 2 | 2018-07-01 10:00:00 | PGAE | Pacific Gas and Electric | CISO | California Independent System Operator | 11200.0 | megawatthours |
| 3 | 2018-07-01 11:00:00 | PGAE | Pacific Gas and Electric | CISO | California Independent System Operator | 10822.0 | megawatthours |
| 4 | 2018-07-01 12:00:00 | PGAE | Pacific Gas and Electric | CISO | California Independent System Operator | 10644.0 | megawatthours |
# Save the data
d = eia_data.append_data(data_path = data_path, new_data = data, init = True, save = True)
# Save the metadata
meta["success"] = True
meta["update"] = True
m = eia_data.append_metadata(meta_path = meta_path, meta = meta, save = True, init = True)
print(m)Initial data pull
Save the data to CSV file
index parent subba time start \
0 1 CISO PGAE 2024-06-22 19:17:11.504732+00:00 2018-07-01 08:00:00
0 1 CISO SCE 2024-06-22 19:17:30.010912+00:00 2018-07-01 08:00:00
0 1 CISO SDGE 2024-06-22 19:17:46.075978+00:00 2018-07-01 08:00:00
0 1 CISO VEA 2024-06-22 19:18:03.131569+00:00 2018-07-01 08:00:00
end start_act end_act start_match \
0 2024-06-15 01:00:00 2018-07-01 08:00:00 2024-06-15 01:00:00 True
0 2024-06-15 01:00:00 2018-07-01 08:00:00 2024-06-15 01:00:00 True
0 2024-06-15 01:00:00 2018-07-01 08:00:00 2024-06-15 01:00:00 True
0 2024-06-15 01:00:00 2018-07-01 08:00:00 2024-06-15 01:00:00 True
end_match n_obs na type update success \
0 True 52218 98 backfill True True
0 True 52218 98 backfill True True
0 True 52218 98 backfill True True
0 True 52218 98 backfill True True
comments
0 Missing values were found;
0 Missing values were found;
0 Missing values were found;
0 Missing values were found;
We will use Plotly to visualize the series:
d = data.sort_values(by = ["subba", "period"])
p = px.line(d, x="period", y="value", color="subba")
p.show()profile = ProfileReport(d, title="Profiling Report")
profile